2025 R/Medicine Challenge: Measles Vaccinations and Outbreak Trends

Published

May 5, 2025

Code
load_data <- function(url) {
  tryCatch(
    {
      df <- read_csv(url, show_col_types = FALSE) %>%
        janitor::clean_names() # Standardize column names
      message("Successfully loaded and cleaned data from: ", basename(url))
      return(df)
    },
    error = function(e) {
      message("Failed to load data from: ", url)
      message("Error: ", e$message)
      return(NULL)
    }
  )
}

# --- Data URLs ---
base_url <- "https://raw.githubusercontent.com/fbranda/measles/main/"
urls <- list(
  global_coverage = paste0(base_url, "Measles_vaccination_coverage_Global.csv"),
  global_cases = paste0(base_url, "Measles_Global.csv"),
  europe_cases = paste0(base_url, "Measles_Europe.csv"),
  us_coverage_cases = paste0(base_url, "USA/data/all/measles-USA-by-mmr-coverage.csv"),
  us_onset = paste0(base_url, "USA/data/all/measles-USA-by-onset-date.csv"),
  us_year = paste0(base_url, "USA/data/all/measles-USA-by-year.csv"),
  us_age_2025 = paste0(base_url, "USA/data/2025/measles-USA-by-age.csv"),
  us_county_2025 = paste0(base_url, "USA/data/2025/measles-USA-by-county-timeline.csv"),
  us_state_2025 = paste0(base_url, "USA/data/2025/measles-USA-by-state-timeline.csv"),
  us_confirmed_2025 = paste0(base_url, "USA/data/2025/measles-USA-confirmed-cases.csv")
)

#----Load Data----
data <- lapply(urls, load_data)
Successfully loaded and cleaned data from: Measles_vaccination_coverage_Global.csv
Successfully loaded and cleaned data from: Measles_Global.csv
New names:
Successfully loaded and cleaned data from: Measles_Europe.csv
Successfully loaded and cleaned data from: measles-USA-by-mmr-coverage.csv
Successfully loaded and cleaned data from: measles-USA-by-onset-date.csv
Successfully loaded and cleaned data from: measles-USA-by-year.csv
Successfully loaded and cleaned data from: measles-USA-by-age.csv
Successfully loaded and cleaned data from: measles-USA-by-county-timeline.csv
Successfully loaded and cleaned data from: measles-USA-by-state-timeline.csv
Successfully loaded and cleaned data from: measles-USA-confirmed-cases.csv
• `` -> `...1`
Code
girai_data <- read_excel("GIRAI_2024_Edition_Data.xlsx")
girai_data2 <- girai_data %>%
  rename(iso3 = ISO3, region = GIRAI_region) %>%
  group_by(iso3,region) %>%
  distinct(iso3,region) %>%
  select(iso3,region)

Introduction

Changing attitudes towards vaccination have led to declining MMR coverage, putting herd immunity at risk. This document explores trends in vaccination, outbreaks using data from the 2025 R/Medicine Challenge.

Exploratory Data Analysis

Global Measles Vaccination Coverage

Code
data$global_coverage <- 
  inner_join(data$global_coverage ,girai_data2, by="iso3")
coverage_summary <- data$global_coverage %>%
      filter(!is.na(antigen) & !is.na(region.y)) %>%
      group_by(region.y, year) %>%
      summarise(avg_coverage = mean(coverage, na.rm = TRUE), .groups = 'drop')
    
    p1_1 <- plot_ly(coverage_summary, x = ~year, y = ~avg_coverage, color = ~region.y,
                    type = 'scatter', mode = 'lines+markers',
                    text = ~paste("Region:", region.y, "<br>Year:", year, "<br>Coverage:", round(avg_coverage, 1), "%"),
                    hoverinfo = 'text') %>%
      layout(title = "Avg. Vaccination Coverage by Region", yaxis = list(range = c(0,100), title="Avg Coverage (%)"), xaxis = list(title="Year"))
    p1_1
Code
latest_year_global_cov <- max(coverage_summary$year, na.rm = TRUE)
coverage_stats <- coverage_summary %>%
  filter(year == latest_year_global_cov) %>%
  summarise(Min = min(avg_coverage), Mean = mean(avg_coverage), Max = max(avg_coverage))
paste("Summary Coverage Stats for", latest_year_global_cov, ":")
[1] "Summary Coverage Stats for 2022 :"
Code
coverage_stats
# A tibble: 1 × 3
    Min  Mean   Max
  <dbl> <dbl> <dbl>
1  74.2  83.7  91.2

Average Vaccination rate has fluctuated over the years. The 2020 saw a drop in many regions. Overall vaccination rate for 2022 is around 83.65%.

Global Measles Cases

Code
data$global_cases_update <- data$global_cases %>% 
  pivot_longer(
    cols = !c(region,iso3,country, year), 
    names_to = "month", 
    values_to = "cases"
  )
    global_cases_summary <- data$global_cases_update %>%
      group_by(year) %>%
      summarise(total_cases = sum(cases, na.rm = TRUE), .groups = 'drop')
    
    p2_1 <- plot_ly(global_cases_summary, x = ~year, y = ~total_cases,
                    type = 'scatter', mode = 'lines+markers',
                    text = ~paste("Year:", year, "<br>Total Cases:", scales::comma(total_cases)),
                    hoverinfo = 'text') %>%
      layout(title = "Total Reported Global Measles Cases", yaxis = list(title="Total Cases"), xaxis = list(title="Year")) 
    p2_1

Highest number of cases peaked in 2019 but has been on the decline. There has been an uptick since 2021.

European Measles Cases

Code
europe_heatmap_data <- data$europe_cases %>% filter (indicator == 'Reported confirmed cases', !region_name %in% c('EU/EEA (without UK)','EU/EEA (with UK until 2019)'))
      
    
p3_1 <- plot_ly(europe_heatmap_data, x = ~time, y = ~region_name, z = ~num_value,
                    type = "heatmap", colorscale = "Viridis", # Choose a colorscale
                    hoverinfo = 'text',
                    text = ~paste("Country:", region_name, "<br>Year:", time, "<br>Cases:", num_value)) %>%
      layout(title = "Measles Cases Heatmap in Europe",
             xaxis = list(title = "Year", type = "category"),
             yaxis = list(title = "Country", type = "category"),
             zaxis = list(title = "Numnber of cases"))
p3_1
Warning: 'layout' objects don't have these attributes: 'zaxis'
Valid attributes include:
'_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'

Europe has seen relatively low number of measles cases. It has had an average vaccination rate of over 80%.

US Measles Cases by Year

Code
    p4_1 <- plot_ly(data$us_year, x = ~year, y = ~cases, type = 'scatter', mode = 'lines+markers',
                    text = ~paste("Year:", year, "<br>Cases:", scales::comma(cases)),
                    hoverinfo = 'text') %>%
      layout(title = "Total Reported US Measles Cases by Year",
             xaxis = list(title = "Year"), yaxis = list(title = "Number of Cases"))
    p4_1

Measles cases have been down significantly since 1990.

2025 US Measles Cases by Age Group

Code
    age_levels <- c("< 5", "5_19", "> 20", "Unknown") # Adjust as per data
    data$us_age_2025$age_group <- factor(data$us_age_2025$age_group, levels = age_levels)
    
    p5_1 <- plot_ly(data$us_age_2025, x = ~age_group, y = ~case_count, type = 'bar',
                    text = ~paste("Age Group:", age_group, "<br>Cases:", case_count),
                    hoverinfo = 'text') %>%
      layout(title = "2025 Age Distribution of US Measles Cases",
             xaxis = list(title = "Age Group" , categoryorder = "array", categoryarray = ~levels(age_group) # Use if factor
             ),
             yaxis = list(title = "Number of Cases"))
    p5_1

Those between the ages of 5 and 19 are seem to be most impacted by the measles outbreak in 2025.

Top 5 2025 Measles Cases by US State

Code
    state_data <- data$us_state_2025 %>% mutate(date = as.Date(report_date))
    
    # Identify states with significant cases
    top_states <- state_data %>%
      group_by(state_name) %>%
      filter(date == max(date)) %>%
      ungroup() %>%
      arrange(desc(cases_count)) %>%
      slice_head(n = 5) %>% # Adjust N
      pull(state_name)
    
    p6_1 <- plot_ly(state_data %>% filter(state_name %in% top_states),
                    x = ~date, y = ~cases_count, color = ~state_name,
                    type = 'scatter', mode = 'lines',
                    text = ~paste("State:", state_name, "<br>Date:", date, "<br>Cases:", cases_count),
                    hoverinfo = 'text') %>%
      layout(title = "Top 5 - 2025 Measles Cases by State",
             xaxis = list(title = "Date"), yaxis = list(title = "Cases"))
    p6_1

Since early February 2025, Texas has been seeing a growing number of measles cases.

Top 5 2025 Measles Cases by US State Counties

Code
    county_data <- data$us_county_2025 %>%
      mutate(date = as.Date(report_date),
             county_state = paste(county_name, state_name, sep=", ")) # Unique identifier
    
    # Find top N counties by cases
    top_counties <- county_data %>%
      group_by(county_name) %>%
      filter(date == max(date)) %>%
      ungroup() %>%
      arrange(desc(cases_count)) %>%
      slice_head(n = 5) %>%
      pull(county_name)
    
    
p7_1 <- plot_ly(county_data %>% filter(county_name %in% top_counties),
                    x = ~date, y = ~cases_count, color = ~county_name,
                    type = 'scatter', mode = 'lines',
                    text = ~paste("County:", county_name, "<br>Date:", date, "<br>Cases:", cases_count),
                    hoverinfo = 'text') %>%
      layout(title = "Top 5 - 2025 Measles Cases by County",
             xaxis = list(title = "Date"), yaxis = list(title = "Cases"))
p7_1

Gaines County, Texas seems to be the epicenter of the measles outbreak since early February 2025.

Conclusion

This exploratory analysis highlights measles trends based on the data.

  • Average vaccination rates have fluctuated over the years. It has been trending downwards since 2020 with the exception of Asia & Oceania and the Carribean region.

  • In 2025, Gaines Country, Texas looks like the epicentre of the 2025 US measles outbreak. It is mostly impacting children between the ages of 5 and 19.